Exploratory analysis on Kobe Bryant's data



In [2]:

    
import pandas as pd
import numpy as np
%matplotlib inline



In [3]:

    
df = pd.read_csv("D:\Box Sync\Fall_2016\data_vis\project\data\\data.csv")
# read the input CSV file as pandas dataframe



In [4]:

    
df.head(5)









    Out[4]:






  
    
      
      action_type
      combined_shot_type
      game_event_id
      game_id
      lat
      loc_x
      loc_y
      lon
      minutes_remaining
      period
      ...
      shot_type
      shot_zone_area
      shot_zone_basic
      shot_zone_range
      team_id
      team_name
      game_date
      matchup
      opponent
      shot_id
    
  
  
    
      0
      Jump Shot
      Jump Shot
      10
      20000012
      33.9723
      167
      72
      -118.1028
      10
      1
      ...
      2PT Field Goal
      Right Side(R)
      Mid-Range
      16-24 ft.
      1610612747
      Los Angeles Lakers
      2000-10-31
      LAL @ POR
      POR
      1
    
    
      1
      Jump Shot
      Jump Shot
      12
      20000012
      34.0443
      -157
      0
      -118.4268
      10
      1
      ...
      2PT Field Goal
      Left Side(L)
      Mid-Range
      8-16 ft.
      1610612747
      Los Angeles Lakers
      2000-10-31
      LAL @ POR
      POR
      2
    
    
      2
      Jump Shot
      Jump Shot
      35
      20000012
      33.9093
      -101
      135
      -118.3708
      7
      1
      ...
      2PT Field Goal
      Left Side Center(LC)
      Mid-Range
      16-24 ft.
      1610612747
      Los Angeles Lakers
      2000-10-31
      LAL @ POR
      POR
      3
    
    
      3
      Jump Shot
      Jump Shot
      43
      20000012
      33.8693
      138
      175
      -118.1318
      6
      1
      ...
      2PT Field Goal
      Right Side Center(RC)
      Mid-Range
      16-24 ft.
      1610612747
      Los Angeles Lakers
      2000-10-31
      LAL @ POR
      POR
      4
    
    
      4
      Driving Dunk Shot
      Dunk
      155
      20000012
      34.0443
      0
      0
      -118.2698
      6
      2
      ...
      2PT Field Goal
      Center(C)
      Restricted Area
      Less Than 8 ft.
      1610612747
      Los Angeles Lakers
      2000-10-31
      LAL @ POR
      POR
      5
    
  

5 rows × 25 columns



In [7]:

    
df.dropna(inplace = True)
# drop NA's if any in the rows



In [11]:

    
remove_columns = ['game_event_id', 'game_id', 'lat', 'lon', 'team_id', 'game_date', 'shot_id']
# columns to be removed
df.drop(labels = remove_columns, inplace = True, axis = 1)
# # drop the columns
df.head()



In [12]:

    
df.to_csv("D:\Box Sync\Fall_2016\data_vis\project\data\\processed_data.csv", index = False)
# write data to file



In [ ]:

	action_type	combined_shot_type	game_event_id	game_id	lat	loc_x	loc_y	lon	minutes_remaining	period	...	shot_type	shot_zone_area	shot_zone_basic	shot_zone_range	team_id	team_name	game_date	matchup	opponent	shot_id
0	Jump Shot	Jump Shot	10	20000012	33.9723	167	72	-118.1028	10	1	...	2PT Field Goal	Right Side(R)	Mid-Range	16-24 ft.	1610612747	Los Angeles Lakers	2000-10-31	LAL @ POR	POR	1
1	Jump Shot	Jump Shot	12	20000012	34.0443	-157	0	-118.4268	10	1	...	2PT Field Goal	Left Side(L)	Mid-Range	8-16 ft.	1610612747	Los Angeles Lakers	2000-10-31	LAL @ POR	POR	2
2	Jump Shot	Jump Shot	35	20000012	33.9093	-101	135	-118.3708	7	1	...	2PT Field Goal	Left Side Center(LC)	Mid-Range	16-24 ft.	1610612747	Los Angeles Lakers	2000-10-31	LAL @ POR	POR	3
3	Jump Shot	Jump Shot	43	20000012	33.8693	138	175	-118.1318	6	1	...	2PT Field Goal	Right Side Center(RC)	Mid-Range	16-24 ft.	1610612747	Los Angeles Lakers	2000-10-31	LAL @ POR	POR	4
4	Driving Dunk Shot	Dunk	155	20000012	34.0443	0	0	-118.2698	6	2	...	2PT Field Goal	Center(C)	Restricted Area	Less Than 8 ft.	1610612747	Los Angeles Lakers	2000-10-31	LAL @ POR	POR	5